clear all;
close all;

%  =======================================================================================================================================================
% 
%  Code Description: 
%  This codefile generates the data underlying figure 4 (Cross-fund holdings and fund returns) and figure 5 (Cross-fund holdings and portfolio overlap) 
%  of section 3.2.1 and figure IA.1 (How large is the German fund sector?) from internet appendix A (Portfolio Look-through). 
% 
%  =======================================================================================================================================================
% 
%  Major output:
%  - dataset underlying the figures 4 and 5 as well as figure IA.1 from the internet appendix
%  
%  =======================================================================================================================================================
% 
%  General disclaimer:
%  This file directory produces replication code for "Connected Funds". 
%  Because we cannot share the underlying data provided by the Bundesbank's Research Data and Service Centre (RDSC) and other subscription data sources, 
%  we have included pseudo data to show how the raw data are formatted. 
%  Other researchers can go through a similar approval and subscription process to obtain the underlying data. (2023-04-06)
% 
%  =======================================================================================================================================================


%% Set project directory

projectPath = 'C:\ConnectedFunds_Codebase\'
% Add functions folders
addpath(strcat(projectPath, 'Code\matlab_functions'));



%  ================================================================== Preparations ========================================================================

% Load fund data
load(strcat(projectPath, 'Data\IFS\IFS_All_CleanData'), 'umonth');

Results_RE1_RE0 = [];

Results_TotalAssets                     = NaN(length(umonth),1);
Results_TotalAssets_ByFundType          = NaN(length(umonth),4);
Results_TotalAssets_Spezial_ByFundType  = NaN(length(umonth),4);

Results_TotalAssetHoldings                      = NaN(length(umonth),1);
Results_TotalAssetHoldings_ByFundType           = NaN(length(umonth),4);
Results_TotalAssetHoldings_Spezial_ByFundType   = NaN(length(umonth),4);

Results_TotalAssetsHat                      = NaN(length(umonth),1);
Results_TotalAssetsHat_ByFundType           = NaN(length(umonth),4);
Results_TotalAssetsHat_Spezial_ByFundType   = NaN(length(umonth),4);

Results_OverlapAll      = NaN(length(umonth),3);
Results_Overlap         = NaN(length(umonth),3);
Results_OverlapHat      = NaN(length(umonth),3);
Results_OverlapAllHat   = NaN(length(umonth),3);

Results_OverlapPolkAll      = NaN(length(umonth),3);
Results_OverlapPolk         = NaN(length(umonth),3);
Results_OverlapPolkHat      = NaN(length(umonth),3);
Results_OverlapPolkAllHat   = NaN(length(umonth),3);

Results_HHI     = NaN(length(umonth),5);
Results_HHI_Hat = NaN(length(umonth),5);

Results_HHI_Publikum        = NaN(length(umonth),5);
Results_HHI_Hat_Publikum    = NaN(length(umonth),5);

Results_HHI_Spezial     = NaN(length(umonth),5);
Results_HHI_Hat_Spezial = NaN(length(umonth),5);


%  ================================================================== Loop over time and build dataset ========================================================================


for t = length(umonth) : -1 : 2
    
    date = umonth(t)
       
    % Load fund data
    load(strcat(projectPath, 'Data\IFS\IFS_All_CleanData'));

    % Filter fund data
    IFS_All(IFS_All.artmittel == 4, :)  = [];
    IFS_All(IFS_All.artmittel == 6, :)  = [];
    IFS_All(IFS_All.artmittel > 7, :)   = [];
    IFS_All(IFS_All.etf == 1, :)        = [];
    
    IFS_All_t                                   = IFS_All(IFS_All.DATUM == date,:);
    IFS_All_t(IFS_All_t.BANKG <= 0, :)          = [];
    IFS_All_t(IFS_All_t.bilanzsumme <= 0, :)    = [];
    IFS_All_t(isnan(IFS_All_t.bilanzsumme), :)  = [];
    IFS_All_t                                   = sortrows(IFS_All_t,'ISIN','ascend');
    clear IFS_All;

    % Get funds' contemporary holdings
    load(strcat(projectPath, 'Data\IFS\mat\IFS_Holdings_',num2str(date),'.mat'));
    IFS_Holdings(IFS_Holdings.AMOUNT<=0,:) = [];
    sum(IFS_Holdings.AMOUNT),
    clear idx

    ufund =  intersect(IFS_Holdings.ISIN,IFS_All_t.ISIN); 

    [~,idx]                         = ismember(IFS_All_t.ISIN,ufund);
    IFS_All_t(idx==0,:)             = [];

    [~,idx2_a]                      = ismember(IFS_Holdings.ISIN,ufund);
    IFS_Holdings(idx2_a == 0, : )   = [];

    clear idx*

    uasset = unique(IFS_Holdings.SECCODE);
    
    % Generate cross-fund and cross-asset network
    [~,idx2_a] = ismember(IFS_Holdings.ISIN, ufund);
    [~,idx2_b] = ismember(IFS_Holdings.SECCODE, uasset);

    Network = accumarray([idx2_a idx2_b],IFS_Holdings.AMOUNT,[length(ufund) length(uasset)]);
    Prices  = accumarray([idx2_a idx2_b],IFS_Holdings.SECPRICE,[length(ufund) length(uasset)]);
    
    % Cross-fund network
    [~,idx]                 = ismember(ufund,uasset);
    Network_Fund            = zeros(length(ufund));
    Network_Fund(:,idx>0)   = Network(:,idx(idx>0));
    Prices_Fund             = zeros(length(ufund));
    Prices_Fund(:,idx>0)    = Prices(:,idx(idx>0));
    
    % Cross-asset network
    Network(:,idx(idx>0))   = [];
    uasset(idx(idx>0))      = [];
    clear idx; 

    cash              = IFS_All_t.BANKG;
    fund              = sum(Network_Fund')';
    fund_return       = IFS_All_t.ReturnDiv;
    isin              = IFS_All_t.ISIN;
    totalassets       = IFS_All_t.bilanzsumme;   

    w_cash               = cash ./ totalassets;
    w_fund               = fund ./ totalassets;    
    
    % Clean data    
    other = totalassets  - sum(Network')' - sum(Network_Fund')';
    other(other<0) = 0;
   
    % Apply Equation 7 and reverse engineer fund returns without the cross-fund holdings
    E   = diag(totalassets);
    RE1 = fund_return; 
    RE1(isnan(RE1)) = 0; RE1(RE1>0.8) = 0.8; RE1(RE1<-0.8) = -0.8;
    w   = (eye(length(totalassets)) - inv(E)*Network_Fund);
    RE0 = 100*w*RE1;
    RE1 = 100*RE1; 

    % Build table containing RE1 and RE0 over time
    tmp                             =  table(date * ones(length(RE1), 1), isin, w_fund, totalassets, RE1, RE0);
    tmp.Properties.VariableNames{1} = 'DATUM';  
    tmp.Properties.VariableNames{2} = 'ISIN';
    
    if isempty(Results_RE1_RE0) == 1
       Results_RE1_RE0 = tmp;
    else
       Results_RE1_RE0 = [Results_RE1_RE0; tmp]; 
    end    
    
    % Run look-through function and generate funds' portfolios after looking through fund shares
    [PortfoliosLook, PortfoliosLook_FundShares, PortfoliosLook_Other, v] = PortfolioLookthrough_ConnFunds_Analytical(Network, Network_Fund, other);
    v = sum(v')';
    
    tmp_diff                        = PortfoliosLook - Network; tmp_diff(tmp_diff < 0) = 0;
    tmp_diff                        = sum(tmp_diff);
    tmp_w                           = Network ./ repmat(sum(Network), size(Network, 1), 1); tmp_w(isinf(abs(tmp_w))) = 0;
    tmp_w_look                      = PortfoliosLook ./ repmat(sum(PortfoliosLook), size(PortfoliosLook, 1), 1); tmp_w_look(isinf(abs(tmp_w_look))) = 0;
    tmp                             = table(date * ones(length(uasset), 1),uasset, sum(Network)', sum(Network > 0)', sum(PortfoliosLook > 0)', sum(tmp_w.^2)', sum(tmp_w_look.^2)', tmp_diff');
    tmp.Properties.VariableNames{1} = 'DATUM';
    tmp.Properties.VariableNames{2} = 'ISIN';
    tmp.Properties.VariableNames{3} = 'TotalHoldings';
    tmp.Properties.VariableNames{4} = 'Nfunds_reported';
    tmp.Properties.VariableNames{5} = 'Nfunds_lookthrough';
    tmp.Properties.VariableNames{6} = 'HHI_reported';
    tmp.Properties.VariableNames{7} = 'HHI_lookthrough';  
    tmp.Properties.VariableNames{8} = 'Indirect_Minus_Direct';  
    
    % Save asset level results in t
    save(strcat(projectPath, 'Data\CrossholdingsMatter\tmp_Lookthrough_',num2str(umonth(t))),'tmp');
    clear tmp*
        
    % Generate overall total assets
    Results_TotalAssets(t)          = nansum(totalassets); 
    Results_TotalAssetHoldings(t)   = nansum(Network(:)) + nansum(cash);
    Results_TotalAssetsHat(t)       = nansum(v);
    
    % Generate total assets by fund category (equity fund, bond fund, mixed security fund or fund-of-fund)
    for k = 1:4

        j = k;

        if j == 4
           j = 7;
        end

        Results_TotalAssets_ByFundType(t,k)         = nansum(totalassets(IFS_All_t.artmittel == j));
        Results_TotalAssets_Spezial_ByFundType(t,k) = nansum(totalassets(IFS_All_t.artmittel == j & IFS_All_t.SPEZIAL == 1)); 
        
        Results_TotalAssetsHat_ByFundType(t,k)         = nansum(v(IFS_All_t.artmittel == j));
        Results_TotalAssetsHat_Spezial_ByFundType(t,k) = nansum(v(IFS_All_t.artmittel == j & IFS_All_t.SPEZIAL == 1)); 

    end
    
    % Compute portfolio overlap
    Overlap    = ComputeSimilarity_Cosine([Network Network_Fund]);
    OverlapHat = ComputeSimilarity_Cosine(PortfoliosLook);

    OverlapPolk    = ComputeSimilarity_Polk([Network Network_Fund]);  
    OverlapHatPolk = ComputeSimilarity_Polk(PortfoliosLook);

    Results_OverlapAll(t,:)              = [nanmean(Overlap(:)) nanmedian(Overlap(:)) quantile(Overlap(:),0.75)];
    Results_OverlapPolkAll(t,:)          = [nanmean(OverlapPolk(:)) nanmedian(OverlapPolk(:)) quantile(OverlapPolk(:),0.75)];
        
    % Assess portfolio overlap conditional on funds' holding fund shares
    i = find(sum(Network_Fund')==0);
    
    % Cosine overlap
    tmp = Overlap;
    tmp(i,:) = [];  tmp(:,i) = [];
    
    tmpHat = OverlapHat;
    tmpHat(i,:) = []; tmpHat(:,i) = [];
    
    Results_Overlap(t,:) = [nanmean(tmp(:)) nanmedian(tmp(:)) quantile(tmp(:),0.75)];
    Results_OverlapHat(t,:) = [nanmean(tmpHat(:)) nanmedian(tmpHat(:)) quantile(tmpHat(:),0.75)];
    
    % Polk overlap
    tmp = OverlapPolk;
    tmp(i,:) = [];  tmp(:,i) = [];
    
    tmpHat = OverlapHatPolk;
    tmpHat(i,:) = []; tmpHat(:,i) = [];
    
    Results_OverlapPolk(t,:) = [nanmean(tmp(:)) nanmedian(tmp(:)) quantile(tmp(:),0.75)];
    Results_OverlapPolkHat(t,:) = [nanmean(tmpHat(:)) nanmedian(tmpHat(:)) quantile(tmpHat(:),0.75)];    

    % Compute hhi to assess cross-fund holding effects on funds' asset concentration 
    hhi    = Compute_HHI([Network Network_Fund]);
    hhiHat = Compute_HHI(PortfoliosLook);
       
    Results_HHI(t,:)     = [nanmean(hhi(:)) nanstd(hhi(:)) quantile(hhi(:),0.25) nanmedian(hhi(:)) quantile(hhi(:),0.75)];
    Results_HHI_Hat(t,:) = [nanmean(hhiHat(:)) nanstd(hhiHat(:)) quantile(hhiHat(:),0.25) nanmedian(hhiHat(:)) quantile(hhiHat(:),0.75)];
    
    i = find(IFS_All_t.SPEZIAL == 1);
    Results_HHI_Spezial(t,:)     = [nanmean(hhi(i)) nanstd(hhi(i)) quantile(hhi(i),0.25) nanmedian(hhi(i)) quantile(hhi(i),0.75)];
    Results_HHI_Hat_Spezial(t,:) = [nanmean(hhiHat(i)) nanstd(hhiHat(i)) quantile(hhiHat(i),0.25) nanmedian(hhiHat(i)) quantile(hhiHat(i),0.75)];
    
    i = find(IFS_All_t.SPEZIAL == 0);
    Results_HHI_Publikum(t,:)     = [nanmean(hhi(i)) nanstd(hhi(i)) quantile(hhi(i),0.25) nanmedian(hhi(i)) quantile(hhi(i),0.75)];
    Results_HHI_Hat_Publikum(t,:) = [nanmean(hhiHat(i)) nanstd(hhiHat(i)) quantile(hhiHat(i),0.25) nanmedian(hhiHat(i)) quantile(hhiHat(i),0.75)];
end

% Do some housekeeping
keep umonth Results_RE1_RE0 Results_TotalAssets Results_TotalAssetsHat ...
     Results_TotalAssets_ByFundType Results_TotalAssets_Spezial_ByFundType ...
     Results_TotalAssetHoldings_ByFundType Results_TotalAssetHoldings_Spezial_ByFundType ...
     Results_TotalAssetsHat_ByFundType Results_TotalAssetsHat_Spezial_ByFundType ...
     Results_Overlap Results_OverlapHat Results_OverlapAll ...
     Results_OverlapPolk Results_OverlapPolkHat Results_OverlapPolkAll ...
     Results_HHI Results_HHI_Hat Results_HHI_Publikum Results_HHI_Hat_Publikum Results_HHI_Spezial Results_HHI_Hat_Spezial;

% Save dataset
save(strcat(projectPath, 'Data\CrossholdingsMatter\CrossHoldingsMatter_OverTime.mat'));